library(GEOquery)
library(crayon)
library(tidyverse)
library(Seurat)
library(SeuratData)
library(Azimuth)
library(patchwork)
library(Matrix)
library(RCurl)
library(DoubletFinder)
library(scales)
library(SoupX)
library(cowplot)
library(metap)
library(SingleCellExperiment)
library(DropletUtils)
library(AnnotationHub)
library(HGNChelper)
library(ensembldb)
library(multtest)
library(glmGamPoi)
library(pbapply)
library(data.table)
library(ggrepel)
library(ggpubr)
library(stringr)
library(canvasXpress)
library(clinfun)
library(GGally)
library(gridExtra)
library(factoextra)
library(DESeq2)
library(EnhancedVolcano)
library(ComplexHeatmap)
library(limma)
library(fgsea)
library(org.Mm.eg.db)
library(kableExtra)
GEO Accession: GSE264162
Publication Title: Lung injury-induced activated endothelial cell states persist in aging-associated progressive fibrosis (2024)
Authors: Ahmed A. Raslan, Tho X. Pham, Jisu Lee, Konstantinos Kontodimas, Andrew Tilston-Lunel, Jillian Schmottlach, Jeongmin Hong, Taha Dinc, Andreea M. Bujor, Nunzia Caporarello, Aude Thiriot, Ulrich H. von Andrian, Steven K. Huang, Roberto F. Nicosia, Maria Trojanowska, Xaralabos Varelas, Giovanni Ligresti
Publication: Nature Communications
Rationale: Progressive lung fibrosis is associated with poorly understood aging-related endothelial cell dysfunction. Pulmonary fibrosis is characterized by alveolar damage and the accumulation of collagen-producing fibroblasts that contribute to excessive extracellular matrix deposition and loss of organ function. Endothelial cells from aged mouse lungs with sustained fibrosis exhibit reduced expression of endothelial identity genes and increased expression of inflammatory- and fibrosis-associated genes compared to young animals.
Subpopulations of activated endothelial cell are prevalent in lungs of aged mice and can also be detected in human fibrotic lungs. Longitudinal single cell RNA-sequencing combined with lineage tracing reveal that injury-associated endothelial states transiently appear during the peak of collagen production and vanish during the resolution phase in young lungs. Conversely, endothelial cells derived from aged lungs persist in the activated states and are topologically restricted to fibrotic areas, indicating a failure of the aged vascular endothelial cells to return to quiescence. Differential transcriptional analysis together with pathway evaluation identify putative genes and signaling pathways associated with metabolism, such as glycolysis and oxidative phosphorylation, and upstream regulators, such as MYC, mTOR and YAP/TAZ that likely contribute to the appearance and pathogenic persistence of activated endothelial cells with aging. YAP/TAZ can cooperate with BDNF, a TrkB ligand that is reduced in fibrotic lungs to promote capillary morphogenesis.
Methodology: Single-cell RNA profiling of whole lungs of young mice (2 months) and aged mice (18 months) with a single dose of intratracheal bleomycin instillation. Lungs were isolated at the early resolution phase of fibrosis (30 days post-bleomycin) and compared to saline-treated young and aged lungs. 15 distinct lung cell types were defined from approximately 52,542 cells with vascular endothelial cells being the most represented cellular subtype. The authors then reclustered the Cdh5+ and Pecam-1+ endothelial cells to identify different endothelial subpopulations. They identified 9 vascular endothelial cell subclusters including those form the veins, arteries, general capillaries (gCap) and aerocytes. Within these subclusters, there were 4 subclusters that emerged exclusively in bleomycin-treated lungs, which were defined as “activated” gCap endothelial cells, “activated” aCap endothelial cells, “activated” arterial endothelial cells and “activated” venous endothelial cells as they shared the common markers of cell activation, including Fxdy5, Spp1, Ankrd37, Lrg1, and Amd1.
# Specify project file path.
project_path = "~/Documents/Work/UnityHealth/Data_Projects/scRNAseq_LungFibrosisAnalysis/GSE264162/"
# Load pre-processed Seurat objects.
load(file = paste0(project_path, "data/preprocessed_seurat_obj.RData"))
# Set colour palette for cell type.
celltype_col = c("AT1" = "#2f4f4f",
"AT2" = "#00bfff",
"Basal resting" = "#2e8b57",
"Club (non-nasal)" = "#8b008b",
"Deuterosomal" = "#00ff00",
"Endothelial cells" = "#808000",
"Endothelial lympathic" = "#00008b",
"Fibroblasts" = "#ff4500",
"Fibromyocytes" = "#ffa500",
"Immune" = "#f08080",
"Ionocyte" = "#f0e68c",
"Multiciliated (non-nasal)" = "#ff00ff",
"Myofibroblasts" = "#c71585",
"Neuroendocrine" = "#00fa9a",
"Pericytes" = "#ff1493",
"Suprabasal" = "#eee8aa",
"Transitional Club-AT2" = "#9370db",
"VSMCs" = "#00ffff")
# Set new name for Seurat object to be more informative.
seurat_obj = seurat_cluster
# Collapse different types of endothelial cells into generic endothelial cells.
grep("Endothelial", seurat_obj$cell_type, value = TRUE) %>% unique()
## [1] "Endothelial capillary" "Endothelial arterial" "Endothelial lymphatic"
## [4] "Endothelial venous"
seurat_obj$cell_type <- gsub("Endothelial venous|Endothelial capillary|Endothelial arterial",
"Endothelial cells",
seurat_obj$cell_type)
# Collapse fibroblasts and myofibroblasts into fibroblasts.
grep("ibroblast", seurat_obj$cell_type, value = TRUE) %>% unique()
## [1] "Fibroblasts" "Myofibroblasts"
seurat_obj$cell_type <- gsub("Myofibroblasts",
"Fibroblasts",
seurat_obj$cell_type)
# Set Seurat object idents.
Idents(seurat_obj) <- "cell_type"
# Set max overlaps to infinity globally.
options(ggrepel.max.overlaps = Inf)
# Remove unneeded objects.
rm(seurat_cluster)
# Get the variables of interest.
summary_table <- (FetchData(seurat_obj,
vars = c("mice_id",
"comparison_var",
"treatment",
"age",
"timepoint",
"cell_type",
"Antxr1")))
# Each timepoint contains unsorted lung tissues from 5 mice.
# Define ANTXR1 expression based on 0 expression threshold.
summary_table <- summary_table %>% mutate(Antxr1_expressed = Antxr1 > 0)
# Calculate total cells per timepoint.
total_cells_timepoint_mice <- (summary_table %>%
group_by(mice_id, comparison_var) %>%
summarise(total_cells_timepoint = n(),
.groups = "drop"))
# Summarise cell counts per mice per timepoint for each cell type.
celltype_timepoint_counts_mice <- (summary_table %>%
group_by(mice_id, cell_type, comparison_var) %>%
summarise(celltype_count = n(),
Antxr1_total_exprs = sum(Antxr1_expressed),
.groups = "drop") %>%
left_join(total_cells_timepoint_mice,
by = c("mice_id", "comparison_var")))
# Calculate percentages.
celltype_timepoint_counts_mice <- (celltype_timepoint_counts_mice %>%
mutate(celltype_by_totalcells =
(celltype_count/total_cells_timepoint)*100,
Antxr1_by_totalcells =
(Antxr1_total_exprs/total_cells_timepoint)*100,
Antxr1_by_celltypes =
(Antxr1_total_exprs/celltype_count)*100))
# Calculate summary statistics for the calculated percentages by mice.
summary_stats <- (celltype_timepoint_counts_mice %>%
group_by(cell_type, comparison_var) %>%
summarise(
mean_percent_celltype = mean(celltype_by_totalcells),
sd_percent_celltype = sd(celltype_by_totalcells),
mean_percent_antxr1_total = mean(Antxr1_by_totalcells),
sd_percent_antxr1_total = sd(Antxr1_by_totalcells),
mean_percent_antxr1_celltype = mean(Antxr1_by_celltypes),
sd_percent_antxr1_celltype = sd(Antxr1_by_celltypes),
n_mice = n(),
se_percent_celltype = sd_percent_celltype / sqrt(n_mice),
se_percent_antxr1_total = sd_percent_antxr1_total / sqrt(n_mice),
se_percent_antxr1_celltype = sd_percent_antxr1_celltype / sqrt(n_mice),
.groups = "drop"
))
# Remove unneeded objects.
rm(total_cells_timepoint_mice)
# Visualize the global cell proportion as a UMAP.
p1 <- DimPlot(seurat_obj,
reduction = "umap",
group.by = "cell_type",
split.by = "comparison_var",
cols = celltype_col,
label = TRUE,
label.size = 3,
label.color = "#02066f",
repel = TRUE,
order = TRUE,
ncol = 3,
raster = FALSE) +
theme_linedraw() +
theme(axis.text.x = element_text(size = 8),
axis.text.y = element_text(size = 8),
strip.text = element_text(size = 15)) +
NoLegend()
# Visualize cell proportion counts as barplot.
p2 <- celltype_timepoint_counts_mice %>%
group_by(cell_type, comparison_var) %>%
summarise(n = n(),
mean = plyr::round_any(mean(celltype_by_totalcells),
accuracy = 0.01,
f = ceiling),
sd = plyr::round_any(sd(celltype_by_totalcells),
accuracy = 0.01,
f = ceiling),
.groups = "drop") %>%
mutate(se = plyr::round_any(sd/sqrt(n), accuracy = 0.01, f = ceiling)) %>%
ggplot(aes(x = cell_type,
y = mean,
fill = cell_type)) +
geom_col(show.legend = FALSE) +
geom_errorbar(aes(ymin = mean-se,
ymax = mean+se),
width = 0.4,
alpha = 0.9) +
geom_text(aes(label = mean,
vjust = 0.5,
hjust = -0.8),
size = 5,
angle = 90) +
facet_wrap(~comparison_var, ncol = 3) +
labs(x = "Cell type",
y = "% mean cell type for each timepoint
(Blank = > 0.001 or 0.1%)",
fill = element_blank()) +
ylim(0, 100) +
scale_fill_manual(values = celltype_col) +
theme_linedraw() +
theme(axis.text.x = element_text(size = 12,
angle = 90,
hjust = 1,
vjust = 0.5),
strip.text = element_text(size = 15))
# Visualize both plots together.
grid.arrange(p1, p2, ncol = 1)
# Define list of cell types for each subgroups.
AT2 = WhichCells(seurat_obj, idents = c("AT2"))
fibroblasts = WhichCells(seurat_obj, idents = c("Fibroblasts"))
endothelial = WhichCells(seurat_obj, idents = c("Endothelial cells"))
# Visualize the global cell proportion as a UMAP.
p1 <- DimPlot(seurat_obj,
reduction = "umap",
group.by = "cell_type",
split.by = "comparison_var",
cells.highlight = list(AT2,
fibroblasts,
endothelial),
cols.highlight = c("#808000", "#ff4500", "#00bfff"),
label = TRUE,
label.size = 3,
label.color = "#02066f",
repel = TRUE,
order = TRUE,
ncol = 3,
raster = FALSE) +
theme_linedraw() +
theme(axis.text.x = element_text(size = 8),
axis.text.y = element_text(size = 8),
strip.text = element_text(size = 15)) +
NoLegend()
# Visualize cell proportion counts as barplot.
p2 <- celltype_timepoint_counts_mice %>%
dplyr::filter(cell_type %in% c("AT2",
"Fibroblasts",
"Endothelial cells")) %>%
group_by(cell_type, comparison_var) %>%
summarise(n = n(),
mean = plyr::round_any(mean(celltype_by_totalcells),
accuracy = 0.01,
f = ceiling),
sd = plyr::round_any(sd(celltype_by_totalcells),
accuracy = 0.01,
f = ceiling),
.groups = "drop") %>%
mutate(se = plyr::round_any(sd/sqrt(n), accuracy = 0.01, f = ceiling)) %>%
ggplot(aes(x = cell_type,
y = mean,
fill = cell_type)) +
geom_col(show.legend = FALSE) +
geom_errorbar(aes(ymin = mean-se,
ymax = mean+se),
width = 0.4,
alpha = 0.9) +
geom_text(aes(label = mean,
vjust = 0.5,
hjust = -0.8),
size = 5,
angle = 90) +
facet_wrap(~comparison_var, ncol = 3) +
labs(x = "Cell type",
y = "% mean cell type for each timepoint
(Blank = > 0.001 or 0.1%)",
fill = element_blank()) +
ylim(0, 100) +
scale_fill_manual(values = celltype_col) +
theme_linedraw() +
theme(axis.text.x = element_text(size = 12,
angle = 45,
hjust = 1,
vjust = 1),
strip.text = element_text(size = 15))
# Visualize both plots together.
grid.arrange(p1, p2, ncol = 1)
Acta2: a-SMA marker Col1a1 & Col1a2: Collagen type I marker Col3a1: Collagen type III marker Cthrc1: Prognostic biomarker for fibrosis
# Visualize the expressions of all cell type clusters.
VlnPlot(seurat_obj,
features = c("Acta2", "Col1a2", "Col3a1", "Col1a1", "Antxr1"),
group.by = "cell_type",
split.by = "treatment",
split.plot = FALSE,
stack = TRUE,
flip = TRUE,
same.y.lims = TRUE,
ncol = 1,
raster = FALSE) +
theme(legend.position = "top") +
stat_summary(fun.y = median,
geom = "point",
size = 1,
color = "#000001",
position = position_dodge(0.9))
## The default behaviour of split.by has changed.
## Separate violin plots are now plotted side-by-side.
## To restore the old behaviour of a single split violin,
## set split.plot = TRUE.
##
## This message will be shown once per session.
Acta2: a-SMA marker Col1a1 & Col1a2: Collagen type I marker Col3a1: Collagen type III marker Mmp2: Matrix degrading enzyme Cthrc1: Prognostic biomarker for fibrosis
# Visualize the expressions of all cell type clusters.
VlnPlot(seurat_obj,
features = c("Acta2", "Col1a2", "Col3a1", "Col1a1", "Mmp2", "Antxr1"),
group.by = "cell_type",
split.by = "comparison_var",
split.plot = FALSE,
stack = TRUE,
flip = TRUE,
same.y.lims = TRUE,
ncol = 1) +
theme(legend.position = "top") +
stat_summary(fun.y = median,
geom = "point",
size = 1,
color = "#000001",
position = position_dodge(0.9))
Acta2: a-SMA marker Col1a1 & Col1a2: Collagen type I marker Col3a1: Collagen type III marker Cthrc1: Prognostic biomarker for fibrosis
# Visualize the ANTXR1+ cell type clusters.
FeaturePlot(seurat_obj,
reduction = "umap",
features = "Antxr1",
split.by = "comparison_var",
pt.size = 0.8,
label = TRUE,
label.size = 3,
repel = TRUE,
order = TRUE,
ncol = 2,
cols = c("#d3d3d3", "#edc9af", "#6e260e")) +
patchwork::plot_layout(ncol = 3, nrow = 1) +
theme(axis.text.x = element_text(size = 8),
axis.text.y = element_text(size = 8),
strip.text = element_text(size = 15))
# Visualize ANTXR1 expressed by total cells as barplot.
celltype_timepoint_counts_mice %>%
group_by(cell_type, comparison_var) %>%
summarise(n = n(),
mean = plyr::round_any(mean(Antxr1_by_totalcells),
accuracy = 0.01,
f = ceiling),
sd = plyr::round_any(sd(Antxr1_by_totalcells),
accuracy = 0.01,
f = ceiling),
.groups = "drop") %>%
mutate(se = plyr::round_any(sd/sqrt(n),
accuracy = 0.01,
f = ceiling)) %>%
ggplot(aes(x = cell_type,
y = mean,
fill = cell_type)) +
geom_col(show.legend = FALSE) +
geom_errorbar(aes(ymin = mean-se,
ymax = mean+se),
width = 0.4,
alpha = 0.9) +
geom_text(aes(label = mean,
vjust = 0.5,
hjust = -0.8),
size = 5,
angle = 90) +
facet_wrap(~comparison_var) +
labs(x = "Cell type",
y = "% ANTXR1+ expressed per total cells for each timepoint
(Blank = > 0.001 or 0.1%)",
fill = element_blank()) +
ylim(0, 25) +
scale_fill_manual(values = celltype_col) +
theme_linedraw() +
theme(axis.text.x = element_text(size = 12,
angle = 90,
hjust = 1,
vjust = 0.5),
strip.text = element_text(size = 15))
# Visualize ANTXR1 expressed by total cells as barplot in selected cell types.
celltype_timepoint_counts_mice %>%
dplyr::filter(cell_type %in% c("AT2",
"Fibroblasts",
"Endothelial cells")) %>%
group_by(cell_type, comparison_var) %>%
summarise(n = n(),
mean = plyr::round_any(mean(Antxr1_by_totalcells),
accuracy = 0.01,
f = ceiling),
sd = plyr::round_any(sd(Antxr1_by_totalcells),
accuracy = 0.01,
f = ceiling),
.groups = "drop") %>%
mutate(se = plyr::round_any(sd/sqrt(n),
accuracy = 0.01,
f = ceiling)) %>%
ggplot(aes(x = cell_type,
y = mean,
fill = cell_type)) +
geom_col(show.legend = FALSE) +
geom_errorbar(aes(ymin = mean-se,
ymax = mean+se),
width = 0.4,
alpha = 0.9) +
geom_text(aes(label = mean,
vjust = 0.5,
hjust = -0.8),
size = 5,
angle = 90) +
facet_wrap(~comparison_var) +
labs(x = "Cell type",
y = "% ANTXR1+ expressed per total cells for each timepoint
(Blank = > 0.001 or 0.1%)",
fill = element_blank()) +
ylim(0, 25) +
scale_fill_manual(values = celltype_col) +
theme_linedraw() +
theme(axis.text.x = element_text(size = 12,
angle = 45,
hjust = 1,
vjust = 1),
strip.text = element_text(size = 15))
Acta2: a-SMA marker Col1a1 & Col1a2: Collagen type I marker Col3a1: Collagen type III marker Cthrc1: Prognostic biomarker for fibrosis
# Visualize the ANTXR1+ cell type clusters.
FeaturePlot(seurat_obj,
reduction = "umap",
features = "Antxr1",
split.by = "comparison_var",
pt.size = 0.8,
label = TRUE,
label.size = 3,
repel = TRUE,
order = TRUE,
cols = c("#d3d3d3", "#edc9af", "#6e260e")) +
patchwork::plot_layout(ncol = 3, nrow = 1) +
theme(axis.text.x = element_text(size = 8),
axis.text.y = element_text(size = 8),
strip.text = element_text(size = 15))
# Visualize ANTXR1 expressed by total cells per cell type as barplot.
celltype_timepoint_counts_mice %>%
group_by(cell_type, comparison_var) %>%
summarise(n = n(),
mean = plyr::round_any(mean(Antxr1_by_celltypes),
accuracy = 0.01,
f = ceiling),
sd = plyr::round_any(sd(Antxr1_by_celltypes),
accuracy = 0.01,
f = ceiling),
.groups = "drop") %>%
mutate(se = plyr::round_any(sd/sqrt(n),
accuracy = 0.01,
f = ceiling)) %>%
ggplot(aes(x = cell_type,
y = mean,
fill = cell_type)) +
geom_col(show.legend = FALSE) +
geom_errorbar(aes(ymin = mean-se,
ymax = mean+se),
width = 0.4,
alpha = 0.9) +
geom_text(aes(label = mean,
vjust = 0.5,
hjust = -0.8),
size = 5,
angle = 90) +
facet_wrap(~comparison_var) +
labs(x = "Cell type",
y = "% mean ANTXR1+ expressed per total cells
for each cell type at each timepoint
(Blank = > 0.001 or 0.1%)",
fill = element_blank()) +
ylim(0, 110) +
scale_fill_manual(values = celltype_col) +
theme_linedraw() +
theme(axis.text.x = element_text(size = 12,
angle = 90,
hjust = 1,
vjust = 0.5),
strip.text = element_text(size = 15))
# Visualize ANTXR1 expressed by total cells per cell type as barplot in select cell types.
celltype_timepoint_counts_mice %>%
dplyr::filter(cell_type %in% c("AT2",
"Fibroblasts",
"Endothelial cells")) %>%
group_by(cell_type, comparison_var) %>%
summarise(n = n(),
mean = plyr::round_any(mean(Antxr1_by_celltypes),
accuracy = 0.01,
f = ceiling),
sd = plyr::round_any(sd(Antxr1_by_celltypes),
accuracy = 0.01,
f = ceiling),
.groups = "drop") %>%
mutate(se = plyr::round_any(sd/sqrt(n),
accuracy = 0.01,
f = ceiling)) %>%
ggplot(aes(x = cell_type,
y = mean,
fill = cell_type)) +
geom_col(show.legend = FALSE) +
geom_errorbar(aes(ymin = mean-se,
ymax = mean+se),
width = 0.4,
alpha = 0.9) +
geom_text(aes(label = mean,
vjust = 0.5,
hjust = -0.8),
size = 5,
angle = 90) +
facet_wrap(~comparison_var) +
labs(x = "Cell type",
y = "% mean ANTXR1+ expressed per total cells
for each cell type at each timepoint
(Blank = > 0.001 or 0.1%)",
fill = element_blank()) +
ylim(0, 110) +
scale_fill_manual(values = celltype_col) +
theme_linedraw() +
theme(axis.text.x = element_text(size = 12,
angle = 45,
hjust = 1,
vjust = 1),
strip.text = element_text(size = 15))
# Subset cell type of interest - AT2 cells.
AT2_cells <- subset(seurat_obj, idents = "AT2", invert = FALSE)
AT2_cells$treatment <- factor(AT2_cells$treatment,
levels = c("Sham",
"Bleomycin"))
# Make expression comparisons between metadata of interest.
source("~/Documents/Work/UnityHealth/Data_Projects/VlnPlot_stat.R")
VlnPlot_stat(object = AT2_cells,
gene_signature = c("Antxr1"),
test_sign = list(c("Sham", "Bleomycin_Day 14"),
c("Sham", "Bleomycin_Day 35"),
c("Bleomycin_Day 14", "Bleomycin_Day 35")),
group_name = "comparison_var",
title = "ANTXR1 pairwise comparisons between control and BLM-treated AT2 cells
at different timepoints",
x_angle = 0,
hjust = 0.5,
vjust = 1)
# Remove unneeded objects.
rm(AT2_cells)
# Subset cell type of interest - Fibroblasts.
Fibro_cells <- subset(seurat_obj,
idents = c("Fibroblasts"),
invert = FALSE)
Fibro_cells$treatment <- factor(Fibro_cells$treatment,
levels = c("Sham",
"Bleomycin"))
# Make expression comparisons between metadata of interest.
source("~/Documents/Work/UnityHealth/Data_Projects/VlnPlot_stat.R")
VlnPlot_stat(object = Fibro_cells,
gene_signature = c("Antxr1"),
test_sign = list(c("Sham", "Bleomycin_Day 14"),
c("Sham", "Bleomycin_Day 35"),
c("Bleomycin_Day 14", "Bleomycin_Day 35")),
group_name = "comparison_var",
title = "ANTXR1 pairwise comparisons between control and BLM-treated fibroblasts
at different timepoints",
x_angle = 0,
hjust = 0.5,
vjust = 1)
# Remove unneeded objects.
rm(Fibro_cells)
# Subset cell type of interest - Endothelial cells.
EC_cells <- subset(seurat_obj,
idents = c("Endothelial cells"),
invert = FALSE)
EC_cells$treatment <- factor(EC_cells$treatment,
levels = c("Sham", "Bleomycin"))
# Make expression comparisons between metadata of interest.
source("~/Documents/Work/UnityHealth/Data_Projects/VlnPlot_stat.R")
VlnPlot_stat(object = EC_cells,
gene_signature = c("Antxr1"),
test_sign = list(c("Sham", "Bleomycin_Day 14"),
c("Sham", "Bleomycin_Day 35"),
c("Bleomycin_Day 14", "Bleomycin_Day 35")),
group_name = "comparison_var",
title = "ANTXR1 pairwise comparisons between control and BLM-treated
endothelial cells at different timepoints",
x_angle = 0,
hjust = 0.5,
vjust = 1)
# Remove unneeded objects.
rm(EC_cells)
# Check if default ident of the Seurat object is cell type.
all(Idents(seurat_obj) == seurat_obj$cell_type) # True. Default idents are cell types.
## [1] TRUE
# Aggregate counts based on cell_type, timepoint and accession ID (i.e. mice ID).
pseudo_seu <- AggregateExpression(seurat_obj,
assays = "RNA",
group.by = c("cell_type", "comparison_var", "orig.ident"),
return.seurat = TRUE)
## Names of identity class contain underscores ('_'), replacing with dashes ('-')
## Centering and scaling data matrix
##
## This message is displayed once every 8 hours.
# Use the new metadata column as the default ident.
head(Cells(pseudo_seu))
## [1] "AT1_Sham_GSM8213185-LG-AA-S1-1" "AT1_Sham_GSM8213185-LG-AA-S1-2"
## [3] "AT1_Sham_GSM8213186-LG-AA-S2-1" "AT1_Sham_GSM8213186-LG-AA-S2-2"
## [5] "AT1_Sham_GSM8213187-LG-AA-S3-1" "AT1_Sham_GSM8213187-LG-AA-S3-2"
# Create an aggregate variable with cell_type and timepoint.
pseudo_seu$deg_variable <- paste(pseudo_seu$cell_type, pseudo_seu$comparison_var, sep = "_")
# Set deg_variable as the default ident.
Idents(pseudo_seu) <- "deg_variable"
# Connect to AnnotationHub.
ah <- AnnotationHub()
# Access the Ensembl database for organism.
ahDb <- query(ah,
pattern = c("Mus musculus", "EnsDb"),
ignore.case = TRUE)
# Acquire the latest annotation files.
id <- ahDb %>%
mcols() %>%
rownames() %>%
tail(n = 1)
# Download the appropriate Ensembldb database.
edb <- ah[[id]]
## loading from cache
# Extract gene-level information from database.
annotations <- genes(edb,
return.type = "data.frame")
# Select annotations of interest.
annotations <- annotations %>%
dplyr::select(gene_id, gene_name, seq_name, gene_biotype, description)
# Remove unneeded objects.
rm(ah, ahDb, edb, id)
# Find differential expression genes between BLM day 3 and sham.
# Positive logFC means ident.1 is upregulated relative to ident.2.
grep("Fibroblasts", Idents(pseudo_seu), value = TRUE) %>% unique()
## [1] "Fibroblasts_Sham" "Fibroblasts_Bleomycin-Day 14"
## [3] "Fibroblasts_Bleomycin-Day 35"
fib_blm14_sham <- FindMarkers(pseudo_seu,
ident.1 = "Fibroblasts_Bleomycin-Day 14",
ident.2 = "Fibroblasts_Sham",
test.use = "DESeq2")
## converting counts to integer mode
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
head(fib_blm14_sham, n = 15)
## p_val avg_log2FC pct.1 pct.2 p_val_adj
## Fst 0.000000e+00 3.0963357 1 1 0.000000e+00
## Clic4 3.019963e-301 -1.1919503 1 1 6.808808e-297
## Bace2 5.280071e-269 -1.2998959 1 1 1.190445e-264
## Zfp36 3.088878e-258 -1.8164707 1 1 6.964184e-254
## Hhip 1.738824e-253 -2.7617473 1 1 3.920352e-249
## Igfbp6 2.297332e-252 -1.8671873 1 1 5.179566e-248
## Twsg1 4.245097e-223 -1.1849020 1 1 9.570996e-219
## Slc38a5 9.450215e-195 -1.3608502 1 1 2.130645e-190
## Fbln5 7.717827e-191 -1.8195661 1 1 1.740061e-186
## Trib1 2.962548e-185 -0.9990279 1 1 6.679360e-181
## Myl12b 8.667119e-183 -0.9360675 1 1 1.954089e-178
## Esd 2.675461e-179 1.2318158 1 1 6.032095e-175
## Des 7.655592e-177 2.1606927 1 1 1.726030e-172
## Gda 1.226824e-173 1.9214572 1 1 2.765997e-169
## Arpc1b 2.487397e-164 1.0250249 1 1 5.608085e-160
# Filter out genes that are p.adjusted > 0.05.
dim(fib_blm14_sham)[1] # 21,959 hits before filtering.
## [1] 21959
fib_blm14_sham_sig <- fib_blm14_sham[which(fib_blm14_sham$p_val_adj < 0.05),]
dim(fib_blm14_sham_sig)[1] # 4,032 after filtering for p-adjusted values.
## [1] 4032
# Add gene annotations to the DEG output.
fib_blm14_sham_sig <- fib_blm14_sham_sig %>% rownames_to_column("geneID")
fib_blm14_sham_sig <- dplyr::left_join(fib_blm14_sham_sig, annotations,
by = c("geneID" = "gene_name"))
# DEG output.
volcano_input = fib_blm14_sham_sig
EnhancedVolcano(volcano_input,
volcano_input$geneID,
x = "avg_log2FC",
y = "p_val_adj",
selectLab = c(volcano_input[c(volcano_input$p_val_adj < 1e-5 &
(volcano_input$avg_log2FC > 2.5 |
volcano_input$avg_log2FC < -2.5)),]$geneID,
"Antxr1"),
title = "Fibroblasts BLM Day 14 vs. Fibroblasts Sham",
pointSize = 1.0,
colAlpha = 0.8,
drawConnectors = TRUE,
legendPosition = "bottom",
legendLabSize = 12.0,
legendIconSize = 3.0)
## Warning: One or more p-values is 0. Converting to 10^-1 * current lowest
## non-zero p-value...
# Find differential expression genes between BLM day 35 and sham.
# Positive logFC means ident.1 is upregulated relative to ident.2.
grep("Fibroblasts", Idents(pseudo_seu), value = TRUE) %>% unique()
## [1] "Fibroblasts_Sham" "Fibroblasts_Bleomycin-Day 14"
## [3] "Fibroblasts_Bleomycin-Day 35"
fib_blm35_sham <- FindMarkers(pseudo_seu,
ident.1 = "Fibroblasts_Bleomycin-Day 35",
ident.2 = "Fibroblasts_Sham",
test.use = "DESeq2")
## converting counts to integer mode
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
head(fib_blm35_sham, n = 15)
## p_val avg_log2FC pct.1 pct.2 p_val_adj
## Gxylt2 1.033755e-39 -0.57894692 1 1.000 2.330704e-35
## Phlda3 9.945120e-35 0.87904892 1 1.000 2.242227e-30
## Errfi1 3.049106e-33 -0.59273333 1 1.000 6.874515e-29
## Cdkn1c 2.361653e-25 -0.33832840 1 1.000 5.324583e-21
## Trib1 8.415577e-25 -0.71436309 1 1.000 1.897376e-20
## Hoxa5 1.470757e-24 -0.46629287 1 1.000 3.315968e-20
## Bace2 1.109882e-22 -0.36513852 1 1.000 2.502339e-18
## Btg2 1.721902e-22 -0.70928564 1 1.000 3.882200e-18
## Ifrd1 2.716179e-22 -0.36011194 1 1.000 6.123898e-18
## Ube2c 2.815512e-22 1.17515202 1 0.833 6.347854e-18
## Bex4 1.236572e-21 -0.08739208 1 1.000 2.787976e-17
## Zfp36 2.752970e-21 -0.67443462 1 1.000 6.206845e-17
## Thbs4 5.520670e-21 1.38504611 1 0.333 1.244690e-16
## Omd 8.423518e-21 -0.41191327 1 1.000 1.899166e-16
## Spock2 8.661249e-21 -0.61240980 1 1.000 1.952765e-16
# Filter out genes that are p.adjusted > 0.05.
dim(fib_blm35_sham)[1] # 21,239 hits before filtering.
## [1] 21293
fib_blm35_sham_sig <- fib_blm35_sham[which(fib_blm35_sham$p_val_adj < 0.05),]
dim(fib_blm35_sham_sig)[1] # 463 after filtering for p-adjusted values.
## [1] 463
# Add gene annotations to the DEG output.
fib_blm35_sham_sig <- fib_blm35_sham_sig %>% rownames_to_column("geneID")
fib_blm35_sham_sig <- dplyr::left_join(fib_blm35_sham_sig, annotations,
by = c("geneID" = "gene_name"))
# DEG output.
volcano_input = fib_blm35_sham_sig
EnhancedVolcano(volcano_input,
volcano_input$geneID,
x = "avg_log2FC",
y = "p_val_adj",
selectLab = c(volcano_input[c(volcano_input$p_val_adj < 1e-4 &
(volcano_input$avg_log2FC > 1.5 |
volcano_input$avg_log2FC < -1.5)),]$geneID,
"Antxr1"),
title = "Fibroblasts BLM Day 35 vs. Fibroblasts Sham",
pointSize = 1.0,
colAlpha = 0.8,
drawConnectors = TRUE,
legendPosition = "bottom",
legendLabSize = 12.0,
legendIconSize = 3.0)
# Find differential expression genes between BLM day 35 and BLM day 14.
# Positive logFC means ident.1 is upregulated relative to ident.2.
grep("Fibroblasts", Idents(pseudo_seu), value = TRUE) %>% unique()
## [1] "Fibroblasts_Sham" "Fibroblasts_Bleomycin-Day 14"
## [3] "Fibroblasts_Bleomycin-Day 35"
fib_blm35_blm14 <- FindMarkers(pseudo_seu,
ident.1 = "Fibroblasts_Bleomycin-Day 35",
ident.2 = "Fibroblasts_Bleomycin-Day 14",
test.use = "DESeq2")
## converting counts to integer mode
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
head(fib_blm35_blm14, n = 15)
## p_val avg_log2FC pct.1 pct.2 p_val_adj
## Ces1d 0.000000e+00 1.885874 1 1 0.000000e+00
## Tcf21 0.000000e+00 2.061443 1 1 0.000000e+00
## Efemp1 0.000000e+00 1.821982 1 1 0.000000e+00
## Neat1 6.081612e-270 -1.139471 1 1 1.371160e-265
## Socs3 4.339790e-223 1.255577 1 1 9.784490e-219
## Myl12b 5.771588e-196 0.978013 1 1 1.301262e-191
## Pcolce2 3.992698e-174 2.575051 1 1 9.001936e-170
## Gstm1 7.722309e-162 1.631620 1 1 1.741072e-157
## Hsd11b1 1.340473e-157 1.157586 1 1 3.022231e-153
## Htra1 4.259766e-156 1.218977 1 1 9.604068e-152
## Pid1 1.685954e-126 1.617434 1 1 3.801152e-122
## Abca8a 2.477148e-114 1.242925 1 1 5.584979e-110
## Rarres2 6.081444e-111 1.229687 1 1 1.371122e-106
## Txnip 8.058787e-111 1.410337 1 1 1.816934e-106
## C7 3.130730e-110 1.549259 1 1 7.058545e-106
# Filter out genes that are p.adjusted > 0.05.
dim(fib_blm35_blm14)[1] # 22,096 hits before filtering.
## [1] 22096
fib_blm35_blm14_sig <- fib_blm35_blm14[which(fib_blm35_blm14$p_val_adj < 0.05),]
dim(fib_blm35_blm14_sig)[1] # 3,881 after filtering for p-adjusted values.
## [1] 3876
# Add gene annotations to the DEG output.
fib_blm35_blm14_sig <- fib_blm35_blm14_sig %>% rownames_to_column("geneID")
fib_blm35_blm14_sig <- dplyr::left_join(fib_blm35_blm14_sig, annotations,
by = c("geneID" = "gene_name"))
# DEG output.
volcano_input = fib_blm35_blm14_sig
EnhancedVolcano(volcano_input,
volcano_input$geneID,
x = "avg_log2FC",
y = "p_val_adj",
selectLab = c(volcano_input[c(volcano_input$p_val_adj < 1e-4 &
(volcano_input$avg_log2FC > 2 |
volcano_input$avg_log2FC < -2)),]$geneID,
"Antxr1"),
title = "Fibroblasts BLM Day 35 vs. Fibroblasts BLM Day 14",
pointSize = 1.0,
colAlpha = 0.8,
drawConnectors = TRUE,
legendPosition = "bottom",
legendLabSize = 12.0,
legendIconSize = 3.0)
## Warning: One or more p-values is 0. Converting to 10^-1 * current lowest
## non-zero p-value...
# Combine DEG results as a list.
deg_list = list(BLM14days_vs_Sham = fib_blm14_sham_sig$geneID,
BLM35days_vs_Sham = fib_blm35_sham_sig$geneID,
BLM35days_vs_BLM14days = fib_blm35_blm14_sig$geneID)
head(list_to_matrix(deg_list))
## BLM14days_vs_Sham BLM35days_vs_Sham BLM35days_vs_BLM14days
## 0610009L18Rik 1 0 0
## 0610012G03Rik 1 0 1
## 0610043K17Rik 1 0 1
## 1110004F10Rik 1 0 1
## 1110008P14Rik 1 0 1
## 1110018N20Rik 1 0 1
# Make the combination matrix.
m = make_comb_mat(deg_list)
m[1:4]
## A combination matrix with 3 sets and 4 combinations.
## ranges of combination set size: c(64, 2534).
## mode for the combination size: distinct.
## sets are on rows.
##
## Combination sets are:
## BLM14days_vs_Sham BLM35days_vs_Sham BLM35days_vs_BLM14days code size
## x x x 111 144
## x x 110 190
## x x 101 2534
## x x 011 64
##
## Sets are:
## set size
## BLM14days_vs_Sham 4032
## BLM35days_vs_Sham 463
## BLM35days_vs_BLM14days 3876
# Plot the interaction in an UpSet plot.
UpSet(m, set_order = c("BLM14days_vs_Sham",
"BLM35days_vs_Sham",
"BLM35days_vs_BLM14days"))
Analysis codes are adapted from HBCtraining and Ouyang Lab with credits going to the original authors of the publications cited in the book.